#*-*coding:utf-8*-*

from scrapy.spiders import BaseSpider

from scrapy import selector
from newSpider import items

class samSpider(BaseSpider):
	name = "sam"
	allowed_domains = ["dmoz.org"]
	start_urls = [
		"http://www.dmoz.org/Computers/Programming/Languages/Python/Books/", 
        "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
	]
	
	def parse(self, response):
		hxs = selector.HtmlXPathSelector(response)
		sites = hxs.xpath('//fieldset/ul/li')
		myitems = []
		for site in sites:
			item = items.DmozItem()
			item['title'] = site.xpath('a/text()').extract()
			item['link'] = site.xpath('a/@href').extract()
			item['desc'] = site.xpath('text()').extract()
			myitems.append(item)
		return myitems


'''
	def parse(self, response):
		fileName = response.url.split("/")[-2] 	#取文件名
		#以fileName为文件名创建文件，并将response对象的内容写入
		open(fileName, 'wb').write(response.body) 
'''